library(GEOquery)
library(crayon)
library(tidyverse)
library(Seurat)
library(SeuratData)
library(Azimuth)
library(patchwork)
library(Matrix)
library(RCurl)
library(DoubletFinder)
library(scales)
library(SoupX)
library(cowplot)
library(metap)
library(SingleCellExperiment)
library(DropletUtils)
library(AnnotationHub)
library(HGNChelper)
library(ensembldb)
library(multtest)
library(glmGamPoi)
library(pbapply)
library(data.table)
library(ggrepel)
library(ggpubr)
library(stringr)
library(canvasXpress)
library(clinfun)
library(GGally)
library(gridExtra)
library(factoextra)
library(DESeq2)
library(EnhancedVolcano)
library(ComplexHeatmap)
library(limma)
library(fgsea)
library(org.Mm.eg.db)
library(kableExtra)
GEO Accession: GSE250396
Publication Title: Unpublished
Authors: Jason Guo
Publication: N/A
Rationale: Fibrosis, the replacement of healthy tissue with collagen-rich matrix, can occur following injury in almost every organ. Mouse lungs follow stereotyped sequences of fibrogenesis-to-resolution after bleomycin injury, and we reasoned that profiling post-injury histological progression could uncover pro- vs. anti-fibrotic features with functional value for human fibrosis.
We mapped spatiotemporally-resolved transformations in lung extracellular matrix (ECM) architecture to spatially-resolved, multi-omic data. First, we charted stepwise trajectories of matrix aberration vs. resolution using unsupervised machine learning, denoting a reversible transition in uniform-to-disordered histological architecture. Single-cell sequencing along these trajectories identified temporally-enriched “ECM-secreting” (Csmd1+) and “pro-resolving” (Cd248+) fibroblasts, for which Visium inferred divergent histological signatures and spatial-transcriptional “neighborhoods”. Critically, pro-resolving fibroblast instillation helped ameliorate fibrosis in vivo. Further, fibroblast neighborhood-associated moieties, Serpine2 and Pi16, functionally modulated human lung fibrosis ex vivo. Spatial phenotyping of idiopathic pulmonary fibrosis further uncovered analogous fibroblast subtypes and neighborhoods in human disease. Collectively, these findings establish an atlas of pro-/anti-fibrotic factors underlying lung matrix architecture and implicate fibroblast-centered moieties in modulating fibrotic progression vs. resolution.
Methodology: scRNA-seq of unsorted cells from bleomycin-injured mouse lung samples using the 10X genomics platform collected at day 0 (control), day 14 post-injury and day 35 post-injury.
# Specify project file path.
project_path = "/Users/kendrix/Documents/Work/UnityHealth/Data_Projects/scRNAseq_LungFibrosisAnalysis/GSE250396/"
# Load pre-processed Seurat objects.
load(file = paste0(project_path, "data/preprocessed_seurat_obj.RData"))
# Set colour palette for cell type.
celltype_col = c("AT1" = "#2f4f4f",
"AT2" = "#00bfff",
"Basal resting" = "#2e8b57",
"Club (non-nasal)" = "#8b008b",
"Deuterosomal" = "#00ff00",
"Endothelial cells" = "#808000",
"Endothelial lympathic" = "#00008b",
"Fibroblasts" = "#ff4500",
"Fibromyocytes" = "#ffa500",
"Immune" = "#f08080",
"Ionocyte" = "#f0e68c",
"Multiciliated (non-nasal)" = "#ff00ff",
"Myofibroblasts" = "#c71585",
"Neuroendocrine" = "#00fa9a",
"Pericytes" = "#ff1493",
"Suprabasal" = "#eee8aa",
"Transitional Club-AT2" = "#9370db",
"VSMCs" = "#00ffff")
# Set new name for Seurat object to be more informative.
seurat_obj = seurat_cluster
# Collapse different types of endothelial cells into generic endothelial cells.
grep("Endothelial", seurat_obj$cell_type, value = TRUE) %>% unique()
## [1] "Endothelial arterial" "Endothelial venous" "Endothelial capillary"
## [4] "Endothelial lymphatic"
seurat_obj$cell_type <- gsub("Endothelial venous|Endothelial capillary|Endothelial arterial",
"Endothelial cells",
seurat_obj$cell_type)
# Collapse fibroblasts and myofibroblasts into fibroblasts.
grep("ibroblast", seurat_obj$cell_type, value = TRUE) %>% unique()
## [1] "Fibroblasts" "Myofibroblasts"
seurat_obj$cell_type <- gsub("Myofibroblasts",
"Fibroblasts",
seurat_obj$cell_type)
# Set Seurat object idents.
Idents(seurat_obj) <- "cell_type"
# Set max overlaps to infinity globally.
options(ggrepel.max.overlaps = Inf)
# Remove unneeded objects.
rm(seurat_cluster)
# Get the variables of interest.
summary_table <- (FetchData(seurat_obj,
vars = c("cell_type", "treatment", "timepoint", "Antxr1")))
# Each timepoint contains unsorted lung tissues from a single mouse.
# Define ANTXR1 expression based on 0 expression threshold.
summary_table <- summary_table %>% mutate(Antxr1_expressed = Antxr1 > 0)
# Calculate total cells per timepoint.
total_cells_timepoint <- (summary_table %>%
group_by(timepoint) %>%
summarise(total_cells_timepoint = n(),
.groups = "drop"))
# Summarise cell counts per timepoint for each cell type.
celltype_timepoint_counts <- (summary_table %>%
group_by(cell_type, timepoint) %>%
summarise(celltype_count = n(),
Antxr1_total_exprs = sum(Antxr1_expressed),
.groups = "drop") %>%
left_join(total_cells_timepoint, by = "timepoint"))
# Calculate percentages.
celltype_timepoint_counts <- (celltype_timepoint_counts %>%
mutate(celltype_by_totalcells =
(celltype_count/total_cells_timepoint)*100,
Antxr1_by_totalcells =
(Antxr1_total_exprs/total_cells_timepoint)*100,
Antxr1_by_celltypes =
(Antxr1_total_exprs/celltype_count)*100))
# Remove unneeded objects.
rm(total_cells_timepoint)
# Visualize the global cell proportion as a UMAP.
p1 <- DimPlot(seurat_obj,
reduction = "umap",
group.by = "cell_type",
split.by = "timepoint",
cols = celltype_col,
label = TRUE,
label.size = 3,
label.color = "#02066f",
repel = TRUE,
order = TRUE,
raster = FALSE) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15)) +
NoLegend()
# Visualize cell proportion counts as barplot.
p2 <- celltype_timepoint_counts %>%
mutate(celltype_by_totalcells = plyr::round_any(celltype_by_totalcells,
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = celltype_by_totalcells,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = celltype_by_totalcells,
vjust = 0.5,
hjust = -0.5),
size = 3,
angle = 90) +
facet_wrap(~timepoint) +
labs(x = "Cell type",
y = "% aggregated cell type for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 100) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 90,
hjust = 1,
vjust = 0.5),
strip.text = element_text(size = 15))
# Visualize both plots together.
grid.arrange(p1, p2, ncol = 1)
# Define list of cell types for each subgroups.
AT2 = WhichCells(seurat_obj, idents = c("AT2"))
fibroblasts = WhichCells(seurat_obj, idents = c("Fibroblasts"))
endothelial = WhichCells(seurat_obj, idents = c("Endothelial cells"))
VSMCs = WhichCells(seurat_obj, idents = c("VSMCs"))
# Visualize the global cell proportion as a UMAP.
p1 <- DimPlot(seurat_obj,
reduction = "umap",
group.by = "cell_type",
split.by = "timepoint",
cells.highlight = list(AT2,
fibroblasts,
endothelial,
VSMCs),
cols.highlight = c("#00ffff", "#808000", "#ff4500", "#00bfff"),
label = TRUE,
label.size = 3,
label.color = "#02066f",
repel = TRUE,
order = TRUE,
raster = FALSE) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15)) +
NoLegend()
# Visualize cell proportion counts as barplot.
p2 <- celltype_timepoint_counts %>%
dplyr::filter(cell_type %in% c("AT2",
"Fibroblasts",
"Endothelial cells",
"VSMCs")) %>%
mutate(celltype_by_totalcells = plyr::round_any(celltype_by_totalcells,
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = celltype_by_totalcells,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = celltype_by_totalcells,
vjust = 0.5,
hjust = -0.5),
size = 3,
angle = 90) +
facet_wrap(~timepoint) +
labs(x = "Cell type",
y = "% aggregated cell type for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 50) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 45,
hjust = 1,
vjust = 1),
strip.text = element_text(size = 15))
# Visualize both plots together.
grid.arrange(p1, p2, ncol = 1)
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the expressions of all cell type clusters.
VlnPlot(seurat_obj,
features = c("Acta2", "Col1a2", "Col3a1", "Col1a1", "Antxr1"),
group.by = "cell_type",
split.by = "treatment",
split.plot = FALSE,
stack = TRUE,
flip = TRUE,
same.y.lims = TRUE,
ncol = 1,
raster = FALSE) +
theme(legend.position = "top") +
stat_summary(fun.y = median,
geom = "point",
size = 1,
color = "#000001",
position = position_dodge(0.9))
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the expressions of all cell type clusters.
VlnPlot(seurat_obj,
features = c("Acta2", "Col1a2", "Col3a1", "Col1a1", "Antxr1"),
group.by = "cell_type",
split.by = "timepoint",
split.plot = FALSE,
stack = TRUE,
flip = TRUE,
same.y.lims = TRUE,
ncol = 1) +
theme(legend.position = "top") +
stat_summary(fun.y = median,
geom = "point",
size = 1,
color = "#000001",
position = position_dodge(0.9))
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the ANTXR1+ cell type clusters.
FeaturePlot(seurat_obj,
reduction = "umap",
features = "Antxr1",
split.by = "timepoint",
pt.size = 0.8,
label = TRUE,
label.size = 3,
repel = TRUE,
order = TRUE,
cols = c("#d3d3d3", "#edc9af", "#6e260e")) +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells as barplot.
celltype_timepoint_counts %>%
mutate(Antxr1_by_totalcells = plyr::round_any(Antxr1_by_totalcells,
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = Antxr1_by_totalcells,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = Antxr1_by_totalcells,
vjust = 0.5,
hjust = -0.5),
size = 3,
angle = 90) +
facet_wrap(~timepoint) +
labs(x = "Cell type",
y = "% ANTXR1+ expressed per total cells for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 50) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 90,
hjust = 1,
vjust = 0.5),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells as barplot in selected cell types.
celltype_timepoint_counts %>%
dplyr::filter(cell_type %in% c("AT2",
"Fibroblasts",
"Endothelial cells",
"VSMCs")) %>%
mutate(Antxr1_by_totalcells = plyr::round_any(Antxr1_by_totalcells,
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = Antxr1_by_totalcells,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = Antxr1_by_totalcells,
vjust = 0.5,
hjust = -0.5),
size = 3,
angle = 90) +
facet_wrap(~timepoint) +
labs(x = "Cell type",
y = "% ANTXR1+ expressed per total cells for each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 20) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 45,
hjust = 1,
vjust = 1),
strip.text = element_text(size = 15))
Acta2: a-SMA marker Col1a1 & Col1a2: Collagen type I marker Col3a1: Collagen type III marker Cthrc1: Prognostic biomarker for fibrosis
# Visualize the ANTXR1+ cell type clusters.
FeaturePlot(seurat_obj,
reduction = "umap",
features = "Antxr1",
split.by = "timepoint",
pt.size = 0.8,
label = TRUE,
label.size = 3,
repel = TRUE,
order = TRUE,
cols = c("#d3d3d3", "#edc9af", "#6e260e")) +
theme(axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells per cell type as barplot.
celltype_timepoint_counts %>%
mutate(Antxr1_by_celltypes = plyr::round_any(Antxr1_by_celltypes,
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = Antxr1_by_celltypes,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = Antxr1_by_celltypes,
vjust = 0.5,
hjust = -0.5),
size = 3,
angle = 90) +
facet_wrap(~timepoint) +
labs(x = "Cell type",
y = "% ANTXR1+ expressed per total cells
for each cell type at each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 110) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 90,
hjust = 1,
vjust = 0.5),
strip.text = element_text(size = 15))
# Visualize ANTXR1 expressed by total cells per cell type as barplot in select cell types.
celltype_timepoint_counts %>%
dplyr::filter(cell_type %in% c("AT2",
"Fibroblasts",
"Endothelial cells",
"VSMCs")) %>%
mutate(Antxr1_by_celltypes = plyr::round_any(Antxr1_by_celltypes,
accuracy = 0.01,
f = ceiling)) %>%
ggplot(aes(x = cell_type,
y = Antxr1_by_celltypes,
fill = cell_type)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = Antxr1_by_celltypes,
vjust = 0.5,
hjust = -0.5),
size = 3,
angle = 90) +
facet_wrap(~timepoint) +
labs(x = "Cell type",
y = "% ANTXR1+ expressed per total cells
for each cell type at each timepoint
(Blank = > 0.001 or 0.1%)",
fill = element_blank()) +
ylim(0, 110) +
scale_fill_manual(values = celltype_col) +
theme_linedraw() +
theme(axis.text.x = element_text(size = 12,
angle = 45,
hjust = 1,
vjust = 1),
strip.text = element_text(size = 15))
# Subset cell type of interest - AT2 cells.
AT2_cells <- subset(seurat_obj, idents = "AT2", invert = FALSE)
AT2_cells$treatment <- factor(AT2_cells$treatment, levels = c("none (control)", "bleomycin"))
# Make expression comparisons between metadata of interest.
source("/Users/kendrix/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = AT2_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("post-injury day 0", "post-injury day 14"),
c("post-injury day 14", "post-injury day 35"),
c("post-injury day 0", "post-injury day 35")),
group_name = "timepoint",
title = "ANTXR1 pairwise comparisons between control and BLM-treated
AT2 cells at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
VlnPlot_stat(object = AT2_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("none (control)", "bleomycin")),
group_name = "treatment",
title = "ANTXR1 pairwise comparisons between control and
BLM-treated AT2 cells",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(AT2_cells)
# Subset cell type of interest - Fibroblasts.
Fibro_cells <- subset(seurat_obj, idents = c("Fibroblasts"), invert = FALSE)
Fibro_cells$treatment <- factor(Fibro_cells$treatment, levels = c("none (control)", "bleomycin"))
# Make expression comparisons between metadata of interest.
source("/Users/kendrix/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = Fibro_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("post-injury day 0", "post-injury day 14"),
c("post-injury day 14", "post-injury day 35"),
c("post-injury day 0", "post-injury day 35")),
group_name = "timepoint",
title = "ANTXR1 pairwise comparisons between control and BLM-treated
fibroblasts at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
VlnPlot_stat(object = Fibro_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("none (control)", "bleomycin")),
group_name = "treatment",
title = "ANTXR1 pairwise comparisons between control and
BLM-treated fibroblasts",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(Fibro_cells)
# Subset cell type of interest - Endothelial cells.
EC_cells <- subset(seurat_obj,
idents = c("Endothelial cells"),
invert = FALSE)
EC_cells$treatment <- factor(EC_cells$treatment, levels = c("none (control)", "bleomycin"))
# Make expression comparisons between metadata of interest.
source("/Users/kendrix/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = EC_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("post-injury day 0", "post-injury day 14"),
c("post-injury day 14", "post-injury day 35"),
c("post-injury day 0", "post-injury day 35")),
group_name = "timepoint",
title = "ANTXR1 pairwise comparisons between control and BLM-treated
endothelial cells at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
VlnPlot_stat(object = EC_cells,
gene_signature = c("Antxr1"),
test_sign = list(c("none (control)", "bleomycin")),
group_name = "treatment",
title = "ANTXR1 pairwise comparisons between control and
BLM-treated endothelial cells",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(EC_cells)
# Subset cell type of interest - VSMCs.
VSMCs <- subset(seurat_obj, idents = c("VSMCs"), invert = FALSE)
VSMCs$treatment <- factor(VSMCs$treatment, levels = c("none (control)", "bleomycin"))
# Make expression comparisons between metadata of interest.
source("/Users/kendrix/Documents/Work/UnityHealth/Data_Projects/VlnPlot_stat.R")
VlnPlot_stat(object = VSMCs,
gene_signature = c("Antxr1"),
test_sign = list(c("post-injury day 0", "post-injury day 14"),
c("post-injury day 14", "post-injury day 35"),
c("post-injury day 0", "post-injury day 35")),
group_name = "timepoint",
title = "ANTXR1 pairwise comparisons between control and BLM-treated
VSMCs at different timepoints",
x_angle = 0,
hjust = 0.5,
vjust = 1)
VlnPlot_stat(object = VSMCs,
gene_signature = c("Antxr1"),
test_sign = list(c("none (control)", "bleomycin")),
group_name = "treatment",
title = "ANTXR1 pairwise comparisons between control and
BLM-treated VSMCs",
x_angle = 0,
hjust = 0.5,
vjust = 1)
# Remove unneeded objects.
rm(VSMCs)
Analysis codes are adapted from HBCtraining and Ouyang Lab with credits going to the original authors of the publications cited in the book.